import scrapy
#导入之后在哪用字典在哪实列化，限制字典的key值，防止打错
from myspider.items import MyspiderItem

class ItcastSpider(scrapy.Spider):
    name = 'itcast'
    #2.检查域名
    allowed_domains = ['itcast.cn']
    # 1.修改起始url
    start_urls = ['https://www.itcast.cn/channel/teacher.shtml#ajavaee']

    #3.实现爬取逻辑
    def parse(self, response):
        #定义对于网站的相关操作
        # with open('itcast.html','wb') as f:
        #     f.write(response.body)
        #获取所有教师结点
        t_list = response.xpath('//div[@class="li_txt"]')
        #遍历教室结点列表
        for t in t_list:
            # temp = {}
            item = MyspiderItem()
            #xpath方法返回的是选择器对象列表,extract用于选择器对象提取数据
            item['name'] = t.xpath('./h3/text()').extract_first()
            item['title'] = t.xpath('./h4/text()').extract_first()
            item['desc'] = t.xpath('./p/text()').extract_first()
            # xpath结果为只含有一个值的列表，可以使用extract_first(),如果为多个值则使用extract()
            yield item
